# -------------------------------------------------------------
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.
#
# -------------------------------------------------------------

# Autogenerated By   : src/main/python/generator/generator.py
# Autogenerated From : scripts/builtin/decisionTree.dml

from typing import Dict, Iterable

from systemds.operator import OperationNode, Matrix, Frame, List, MultiReturn, Scalar
from systemds.utils.consts import VALID_INPUT_TYPES


def decisionTree(X: Matrix,
                 y: Matrix,
                 ctypes: Matrix,
                 **kwargs: Dict[str, VALID_INPUT_TYPES]):
    """
     This script implements decision trees for recoded and binned categorical and
     numerical input features. We train a single CART (classification and
     regression tree) decision trees depending on the provided labels y, either
     classification (majority vote per leaf) or regression (average per leaf).
    
     .. code-block::
    
       For example, give a feature matrix with features [a,b,c,d]
       and the following trees, M would look as follows:
    
       (L1)               |d<5|
                         /     \\
       (L2)           P1:2    |a<7|
                              /   \\
       (L3)                 P2:2 P3:1
    
       --> M :=
       [[4, 5, 0, 2, 1, 7, 0, 0, 0, 0, 0, 2, 0, 1]]
        |(L1)| |  (L2)   | |        (L3)         |
    
    
    
    
    
    :param X: Feature matrix in recoded/binned representation
    :param y: Label matrix in recoded/binned representation
    :param ctypes: Row-Vector of column types [1 scale/ordinal, 2 categorical]
        of shape 1-by-(ncol(X)+1), where the last entry is the y type
    :param max_depth: Maximum depth of the learned tree (stopping criterion)
    :param min_leaf: Minimum number of samples in leaf nodes (stopping criterion),
        odd number recommended to avoid 50/50 leaf label decisions
    :param min_split: Minimum number of samples in leaf for attempting a split
    :param max_features: Parameter controlling the number of features used as split
        candidates at tree nodes: m = ceil(num_features^max_features)
    :param max_values: Parameter controlling the number of values per feature used
        as split candidates: nb = ceil(num_values^max_values)
    :param max_dataratio: Parameter in [0,1] controlling when to materialize data
        subsets of X and y on node splits. When set to 0, we always
        scan the original X and y, which has the benefit of avoiding
        the allocation and maintenance of data for all active nodes.
        When set to 0.01 we rematerialize whenever the sub-tree data
        would be less than 1% of last the parent materialize data size.
    :param impurity: Impurity measure: entropy, gini (default), rss (regression)
    :param seed: Fixed seed for randomization of samples and split candidates
    :param verbose: Flag indicating verbose debug output
    :return: Matrix M containing the learned trees, in linearized form
    """

    params_dict = {'X': X, 'y': y, 'ctypes': ctypes}
    params_dict.update(kwargs)
    return Matrix(X.sds_context,
        'decisionTree',
        named_input_nodes=params_dict)
